################################################################################ 
#
# The distribution of hate speech and its implications for content moderation
# PSRM - Replication package
# Figure G8, Tables G18-G21
#
################################################################################# 


################################################################################ 
#  LIBRARIES
################################################################################ 

library(dplyr)    
library(readr)     
library(estimatr)
library(glmnet)     
library(hdm)

rm(list = setdiff(ls(), ls(pattern = "^wd|^setsave$")))

set.seed(42)


################################################################################ 
#   DATA AND FOLDER
################################################################################ 

data = read.csv(paste0(wd_data_processed, '/for_analysis/clean_for_analysis.csv'))

# add target groups
data = data %>% rename(
  t_gender = geschlecht,
  t_age = alter,
  t_sexuality = sexualitat,
  t_religion = religion,
  t_nationality = nationalitat,
  t_disability = beeintrachtigung,
  t_socioeconomic_status = sozialer_status,
  t_politics = politik,
  t_other = andere
  ) 

tchars = paste0('t_', c('gender', 'age', 'sexuality', 'religion', 'nationality', 'disability',
           'socioeconomic_status', 'politics', 'other'))
data[tchars] = lapply(data[tchars], function(x) replace(x, is.na(x), 0))


# drop attrited users
data = data[which(data$attrition==0 & data$not_auth_sofar==0 & data$suspended_sofar==0 & data$protected_sofar==0),]


################################################################################ 
#   Prepare
################################################################################ 

outvars = c('anchor_deleted_12h', 'no_hate_tweets_post_85', 'hate_rate_85', 'post_tweets_h_avg')
outvars_name = c( 'Xenophobic Tweet Deleted <12h', '# Hate Tweets', 'Share of daily hate', 'Probability of Hate Speech') 

treatments = c('alert', 'consequences', 'empathy')
treatments_lab = c( 'Alerting of Hate Speech', 'Warning of Consequences', 'Empathy')


# Scale data
data[outvars] = lapply(data[outvars], scale)


for (char in tchars){
  print(paste0('----- ', char, ' -----'))
  print(table(data[char]))
}

################################################################################ 
#   Descriptives
################################################################################ 

data$t_other[rowSums(data[tchars])==0] = 1

library(ggplot2)

labels_dict <- c(
  t_gender = "Gender",
  t_age = "Age",
  t_sexuality = "Sexuality",
  t_religion = "Religion",
  t_nationality = "Nationality",
  t_disability = "Disability",
  t_socioeconomic_status = "Socioeconomic Status",
  t_politics = "Politics",
  t_other = "Other"
)

# Create a dataframe of means
mean_df <- data.frame(
  variable = tchars,
  mean = colMeans(data[tchars], na.rm = TRUE)
)

# Reorder factor levels by descending mean
mean_df$variable <- factor(mean_df$variable, levels = mean_df$variable[order(-mean_df$mean)])

# Plot Fig. G8
ggplot(mean_df, aes(x = variable, y = mean)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "", y = "", x = "") +
  theme_minimal() +
  scale_x_discrete(labels = labels_dict) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave(paste0(wd_res, '/figures/figG8.png'))

cat("\n====================\n")
cat("Saved Figure G8")
cat("\n====================\n")


################################################################################ 
#   Figures G18-G21
################################################################################ 

#   Functions for running conditional and unconditional regressions
data$treat_aggregate = factor(data$treat_aggregate,
                              levels = c('control', 'alert', 'consequences',  'empathy'))

MODELS_anchor = list()
for (char in tchars){
  data$mod = data[[char]]
  formula = as.formula('anchor_deleted_12h ~ treat_aggregate*mod')
  MODELS_anchor[[char]] = lm_robust(formula, data = data, se_type = "HC3")
}

modelsummary::modelsummary(MODELS_anchor, 
                           stars=c('*' = .1, '**' = .05, '***' = .01),
                           output = paste0(wd_res, "/tables/tabG18_interacted_groups_anchor.tex")
                           )


MODELS_no = list()
for (char in tchars){
  data$mod = data[[char]]
  formula = as.formula('no_hate_tweets_post_85 ~ treat_aggregate*mod')
  MODELS_no[[char]] = lm_robust(formula, data = data, se_type = "HC3")
}

modelsummary::modelsummary(MODELS_no, 
                           stars=c('*' = .1, '**' = .05, '***' = .01),
                           output = paste0(wd_res, "/tables/tabG19_interacted_groups_number.tex")
                           )

MODELS_rate = list()
for (char in tchars){
  data$mod = data[[char]]
  formula = as.formula('hate_rate_85 ~ treat_aggregate*mod')
  MODELS_rate[[char]] = lm_robust(formula, data = data, se_type = "HC3")
}

modelsummary::modelsummary(MODELS_rate, 
                           stars=c('*' = .1, '**' = .05, '***' = .01),
                           output = paste0(wd_res, "/tables/tabG20_interacted_groups_rate.tex")
                           )

MODELS_prob = list()
for (char in tchars){
  data$mod = data[[char]]
  formula = as.formula('post_tweets_h_avg ~ treat_aggregate*mod')
  MODELS_prob[[char]] = lm_robust(formula, data = data, se_type = "HC3")
  }

modelsummary::modelsummary(MODELS_prob, 
                           stars=c('*' = .1, '**' = .05, '***' = .01),
                           output = paste0(wd_res, "/tables/tabG21_interacted_groups_prob.tex")
                           )

cat("\n====================\n")
cat("Saved Tables G18-G21")
cat("\n====================\n")
